<?php 
	//jycn.cn抓取笑话主程
	function jycn_crawl($in,$caiurl,$colid,$pagenumber){
		global $conn;
		$tblName = 'dazhan_dourls';
		$opt_arr = array(CURLOPT_REFERER => "http://search.baidu.com/search",  //referer	
					);
		$i = 1;
		while($i<=$pagenumber){
			$url = $in.'&page='.$i;
			$page = makeRequest($url, 
					"GET", 
					"", 
					false, 
					"",
					"",
					$opt_arr
					);

			$page = str_replace(array(" ","\t","\n","\r","&nbsp;"), "", $page);
			preg_match_all('%<ahref="(/htm/i.*?)"%i', $page, $urls);

			if (!isset($urls[1])) {
				return false;
			}
			$have = 0;
			$sumurl = count($urls[1]);
			foreach ($urls[1] as $k => $url) {
				$dourl = 'http://jycn.cn'.$url;
				$urlword = strlen($caiurl);
				$righturl = 0 ;
				if (substr($dourl, 0,$urlword) == $caiurl) {
					$righturl =1;
				}
				$urlmd5 = md5($dourl);
				$sql = "select * from $tblName where `site`='jycn' and urlhash='$urlmd5' limit 1";		
				$exists = $conn->has_record($sql);
				if ($exists) {
					$have++;
				}elseif($righturl ==1){
					//入库
					$arr = array(
								"site" => 'jycn',
								"colid"=>$colid,
								"righturl" => $righturl,
								"url" => $dourl,
								"urlhash" => $urlmd5,
								"finish" => 0,
								);
					$sql = Sql::insert($arr, $tblName);
					$conn->query($sql);
				}
			}
			
			if ($have == $sumurl) {
				break;
			}
			
			$i++;
		}
	}
	function jycn_data($rule,$colid){
		global $conn;
		$opt_arr = array(CURLOPT_REFERER => "http://search.baidu.com/search",  //referer	
					);
		$tblName = 'dazhan_dourls';
		$sql = "select * from dazhan_dourls where colid=$colid and site='jycn' and finish=0";
		$query = $conn->query($sql);
		while($row = $conn->fetch_assoc($query)){
			$dourl = $row['url'];
			echo $row['id'].'-'.$dourl;
			$page = makeRequest($dourl, 
					"GET", 
					"", 
					false, 
					"",
					"",
					$opt_arr
					);

			$page = str_replace(array(" ","\t","\n","\r","&nbsp;"), "", $page);
			
			preg_match('/<tdclass="content1"height="330"valign="top">(.*?)window.external.AddFavorite/i', $page, $content);
			if (isset($content[1])) {
				//入库
				$content = iconv('gbk','utf-8',strip_tags($content[1]));
				preg_match('%<tdheight="35"align="center"class="title3"style="border-bottom:1pxsolid#CDE3C5">(.*?)</td>%i', $page,$title);
				if (isset($title[1])) {
					$title = iconv('gbk','utf-8',$title[1]);
				}else{
					continue;
				}
				$arr = array(
							"title" =>$title,
							"content"=>$content,
							"ctime" => time(),
							);
				$sql = Sql::insert($arr, 'dazhan_article');
				$conn->query($sql);
				$sql ="update $tblName set finish=1 where id=".$row['id'];
				$conn->query($sql);
			}
		}
	}
	function catchallurls($in,$urls,$caiurl){
		global $conn;
		$tblName = 'dazhan_dourls';
		$opt_arr = array(CURLOPT_REFERER => "http://search.baidu.com/search",  //referer	
					);
		$sumurls = count($urls);
		$have = 0;
		foreach ($urls as $k => $url) {
			$dourl = $in.'/'.$url;
			$urlword = strlen($caiurl);
			$righturl = 0 ;
			if (substr($dourl, 0,$urlword) == $caiurl) {
				$righturl =1;
			}
			$urlmd5 = md5($dourl);
			$sql = "select * from $tblName where `site`='jycn' and urlhash='$urlmd5' limit 1";		
			$exists = $conn->has_record($sql);
			if ($exists) {
				$have++;
			}else{
				//入库
				$arr = array(
							"site" => 'jycn',
							"righturl" => $righturl,
							"url" => $dourl,
							"urlhash" => $urlmd5,
							"finish" => 0,
							);
				$sql = Sql::insert($arr, $tblName);
				$conn->query($sql);
				$page = makeRequest($dourl, 
					"GET", 
					"", 
					false, 
					"",
					"",
					$opt_arr
					);
			
				$page = str_replace(array(" ","\t","\n","\r","&nbsp;"), "", $page);
				preg_match_all('%<ahref="/(more.asp?second_id=.*?)"%i', $page, $suburls);

				if (!isset($suburls[1])) {
					return false;
				}
				catchallurls($in,$suburls[1],$caiurl);
			}
		}
		if ($have == $sumurls) {
			return true;
		}
	}
	//处理入库
	function doWithHaHaPage($page){
		global $conn;
		$tblName = 'jiaxin_weibo_timeline';
		$page = str_replace(array(" ","\t","\n","\r","&nbsp;"), "", $page);
		//file_put_contents('aaa.txt', $page);
		//exit;
		$have = 0;
		preg_match_all('%<divclass="blockjoke-item".*?>(.*?)</div></div></div>%i', $page,$match);
		if (isset($match[1]) && is_array($match[1])) {
			foreach ($match[1] as $key => $value) {
				preg_match('%<pclass="textword-wrap">(.*?)</p>%i', $value,$content);
				if (isset($content[1])) {
					$joke_content = $content[1];
					$joke_content = str_replace('绿', '顶',$joke_content );
					$joke_content = str_replace('红', '踩',$joke_content );
					$md5 = md5($joke_content);
				}else{
					$joke_content = '';
				}
				if ($joke_content == '') {
					continue;
				}
				
				$arr = array(
							"uid" => 1,
							"type" => 2,
							"site" => 1,
							"content" => $joke_content,
							"md5" => $md5,
							);
				preg_match('%data="\{\'url\':http://www.haha.mx/joke/(\d+)%i', $value,$matchid);
				if (isset($matchid[1])) {
					$joke_id = $matchid[1];
				}else{
					$joke_id = 0;
				}
				$sql = "select * from $tblName where `site`='haha' and weiboid='$joke_id' limit 1";		
				$exists = $conn->has_record($sql);
				if ($exists) {
					$have++;
					continue;
				}
				if ($have >=3) {
					return 2;
				}
				$arr['weiboid'] = $joke_id;
				preg_match('/data=".*?\'pic\':(.*?)\}/i', $value,$pic);
				if (isset($pic[1])) {
					$bmiddle_pic = $pic[1];
					$picurl = str_replace('middle', 'big', $bmiddle_pic);
					$thumbnail_pic = str_replace('middle', 'small', $bmiddle_pic);
				}else{
					$bmiddle_pic = $picurl = $thumbnail_pic = '';
				}
				$arr['bmiddle_pic'] = $bmiddle_pic;
				$arr['picurl'] = $picurl;
				$arr['thumbnail_pic'] = $thumbnail_pic;
				
				preg_match('%<ahref=\'/user/\d+\'>(.*?)</a><span>(.*?)发布%i', $value,$user);
				
				if (isset($user[1])) {
					$username = $user[1];
					$posttime = strtotime($user[2]);
				}
				$arr['username'] = $username;
				$arr['posttime'] = $posttime;
				$arr['crawltime'] = time();
				preg_match('%data="g">(\d+)</a>%i', $value,$dinghot);
				if (isset($dinghot[1])) {
					$dinghotnum = $dinghot[1];
				}
				$arr['dinghot'] = $dinghotnum;
				preg_match('%data="b">(\d+)</a>%i', $value,$cai);
				if (isset($cai[1])) {
					$cainum = $cai[1];
				}
				//$arr['cai'] = $cainum;
				$arr['cai'] = 0;
				$arr['flag'] = 0;
				$sql = Sql::insert($arr, $tblName);
				$conn->query($sql);
			}
		}
	}
	//获取最大的id
	function getQihuMaxId()
	{
		global $conn;
		$sql = "select max(joke_id) from jiaxin_joke where site='qihu'";
		$maxId = $conn->result($sql);
		return $maxId;
	}
	//hao.360.cn抓取笑话主程
	function qihu_crawl($begin=1)
	{
		$opt_arr = array(CURLOPT_REFERER => "http://hao.360.cn",  //referer	
					);
		$i =$begin;
		$have = 0 ;
		while(true){
			$url = 'http://hao.360.cn/youmoxiaohua.html?id='.$i;
			$page = makeRequest($url, 
					"GET", 
					"", 
					false, 
					"",
					"",
					$opt_arr
					);
			//printlog("success!!: $page", "qiubai");
			if($page == false){ 
				sleep(5);
				continue;
			}
			$do = doWithQihuPage($page);
			//$do = doWithQihuPage($i,$page);
			if($do == 2){
				echo 'over';
				exit;
			}
			
			$i++;
			echo $i."\n";
			sleep(5);
		}
	}
	//处理入库
	function doWithQihuPagebak($id,$page){
		global $conn;
		$html = str_replace(array("\n","\r", " ", "\t"), "", $page);
		file_put_contents('qihu.txt', $html);
		exit;
		preg_match('%<ddid="content"class="details">(.*?)</dd>%i', $html,$match);
		var_dump($match);
		$content = trim($match[1],'<p></p>');
		$content = str_replace('</p><p>','\r\n',$content);
		$md5 = md5($content);
		if(hasHash($md5)){
			return -2;
		}
		$arr = array(
				"joke_id" => $id,
				"content" => $content,
				"md5" => $md5,
				"site" => 'qihu',
				"ctime" => time(),
				);
		$sql = Sql::insert($arr, $tblName);
		$conn->query($sql);
				
		return true;
	}
	//处理入库
	function doWithQihuPage($page){
		global $conn;
		$user = array('新浪微博','腾讯微博');
		preg_match('/var jokes = (.*?);/i', $page,$match);
		$data = json_decode($match[1]);		
		$tblName = 'jiaxin_weibo_timeline';
		$have = 0 ;
		$count = count($data);
		if($count > 0){
			foreach($data as $key=>$value){
				$sql = "select * from $tblName where `site`=2 and weiboid='".$value[0]."' limit 1";		
				$exists = $conn->has_record($sql);
				if ($exists) {
					$have++;
					continue;
				}
				$joke_content = $value[1];
				if ($value[2]!= '#') {
					$joke_content .=','.$value[2];
				}
				$md5 = md5($joke_content);
				$arr = array(
							"uid" => 1,
							"type" => 2,
							"site" => 2,
							"content" => $joke_content,
							"md5" => $md5,
							"weiboid"=>$value[0],
							"flag"=>0,
							);
				if (isset($value[3])) {
					$arr['bmiddle_pic'] = $value[3];
					$arr['picurl'] =$value[3];
					$arr['thumbnail_pic'] = $value[3];
				}
				$uid = array_rand($user,1);
				$arr['username'] = $user[$uid];
				$arr['posttime'] = time();
				$arr['crawltime'] = time();
				$arr['dinghot'] = rand(200,1000);
		
				$sql = Sql::insert($arr, $tblName);
				$conn->query($sql);
			}
		}
		if($have >5){
				return 2;
		}
		return 1;
	}
	//查看是否有笑话
	function hasHash($value='')
	{
		global $conn;
		$tblName = 'jiaxin_weibo_timeline';
		$sql = "select * from $tblName where md5='$value' limit 1";		
		$exists = $conn->has_record($sql);
		if($exists){
			return true;
		}else{
			return false;
		}
	}
	//查看是否抓取过
	function haveNoThis($id,$site=3){
		global $conn;
		$tblName = 'jiaxin_weibo_timeline';
		$sql = "select * from $tblName where `site`='$site' and weiboid='$id' limit 1";
		
		$exists = $conn->has_record($sql);
		if($exists){
			return false;
		}else{
			return true;
		}
	}
	
	//糗百抓取笑话主程
	function yile_crawl()
	{
		$opt_arr = array(CURLOPT_REFERER => "http://search.baidu.com/search",  //referer	
					);
		$i = 1;
		while($i<20){
			$url = 'http://day.2345.com/gaoxiao_'.$i.'/';
			$page = makeRequest($url, 
					"GET", 
					"", 
					false, 
					"",
					"",
					$opt_arr
					);
			if($page == false){ 
				sleep(5);
				continue;
			}
			//printlog("success!!: $page", "yile");
			$do = doWithYiLePage($page);
			if($do == 2){
				echo "over";
				exit;
			}
			$i++;
			sleep(5);
		}
	}
	//处理入库
	function doWithYiLePage($page){
		global $conn;
		$page = str_replace(array(" ","\t","\n","\r","&nbsp;"), "", $page);
		file_put_contents('yile.txt', $page);
		exit;
		$user = array('新浪微博','腾讯微博');
		preg_match_all('%<divclass="mod_bmt10">.*?<divclass="tb_title"><h4>(.*?)</h4><span>(.*?)</span></div>(.*?)</div><divclass="author">%i',$page,$matches);
		preg_match_all('%<ahref="#"class="voteUp">(.*?)</a><ahref="#"class="voteDown">(.*?)</a>%i',$page,$dingcai);
		preg_match_all('%<ahref="/gaoxiao/(.*?)/"%i',$page,$uids);
		$have = 0 ;
		$tblName = 'jiaxin_weibo_timeline';
		$count = count($matches[1]);
		$jokes = $matches[1];
		$uid = $uids[1];
		$i = 0;
		
		while($i<$count){
			if($jokes[$i] ==''){ break; }
			if(isset($uid[$i])){

				if(haveNoThis($uid[$i],3)){
					$arr = array(
							"uid" => 1,
							"type" => 2,
							"site" => 3,
							"weiboid"=>substr($uid[$i], 6),
							"flag"=>0,
							);
					if (preg_match('/<imgclass="lazyLoad".*?data-url="(.*?)"/i', $matches[3][$i],$pic)) {
						$joke_content = iconv('gb2312', 'UTF-8', $jokes[$i]);
						$md5 = md5($joke_content);
						$arr['content'] = $joke_content;
						$arr['md5'] = $md5;
						$picurl = 'http://day.2345.com/'.$pic[1];
						$arr['bmiddle_pic'] = $picurl;
						$arr['picurl'] =$picurl;
						$arr['thumbnail_pic'] = $picurl;
					} else {
						$joke_content= $jokes[$i]. $matches[3][$i];
						$joke_content = iconv('gb2312', 'UTF-8', $joke_content);
						$md5 = md5($joke_content);
						$arr['content'] = $joke_content;
						$arr['md5'] = $md5;
					}
					$uid = array_rand($user,1);
					$arr['username'] = $user[$uid];
					
					$arr['posttime'] = strtotime(chunk_split($matches[2][$i],10,' '));
					$arr['crawltime'] = time();
					$arr['dinghot'] = $dingcai[1][$i];
					//$arr['cai'] = $dingcai[2][$i];
					
					$sql = Sql::insert($arr, $tblName);
					$conn->query($sql);
				}else{
					$have++;
				}
			}
			$i++;
		}
		if($have >5){
				return 2;
		}
		return 1;
	}
?>